Comparison between RNA-Seq libraries of neural progenitor cells and neurons derived from Pitt-Hopkins Syndrome patients and respective parents of matching sex.
library(DESeq2)
library(tximport)
library(tidyverse)
library(pheatmap)
library(viridis)
library(rmarkdown)
Create design matrix and import quantifications:
samples <- data.frame(
condition = c("parent", "parent", "parent", "patient", "patient", "patient"),
row.names = c("parent_1-1", "parent_1-2", "parent_1-3", "patient_1-1", "patient_1-2", "patient_1-3")
)
tx2gene <- read_tsv("transcript2gene.tsv")
files <- file.path("Quantifications", rownames(samples), "quant.sf")
txi <- tximport(files, type = "salmon", tx2gene = tx2gene)
Create DESeq2 object and perform differential expression testing with apeglm shrinkage and using lfcThreshold to make the alternative hypothesis stricter:
dds <- DESeqDataSetFromTximport(txi, colData = samples, design = ~condition)
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## using counts and average transcript lengths from tximport
dds <- DESeq(dds)
## estimating size factors
## using 'avgTxLength' from assays(dds), correcting for library size
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
res <- lfcShrink(dds, coef = "condition_patient_vs_parent", type = "apeglm", lfcThreshold = 0.5)
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
## Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
## sequence count data: removing the noise and preserving large differences.
## Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## computing FSOS 'false sign or small' s-values (T=0.5)
formated_res <- as.data.frame(res) %>%
rownames_to_column("gene") %>%
arrange(desc(abs(log2FoldChange))) %>%
mutate(significative = if_else(svalue < 0.005, "yes", "no")) %>%
mutate(up = if_else(log2FoldChange < 0, "parent", "patient"))
paged_table(formated_res)
table(formated_res$significative)
##
## no yes
## 25800 4768
plotMA(res)
## thresholding s-values on alpha=0.005 to color points
vsd <- vst(dds, blind = TRUE)
plotPCA(vsd, intgroup = "condition")
select <- order(rowMeans(counts(dds, normalized = TRUE)), decreasing = TRUE)[1:5000]
df <- as.data.frame(colData(dds))
pheatmap(assay(vsd)[select, ],
cluster_rows = TRUE,
show_rownames = FALSE,
cluster_cols = TRUE,
annotation_col = df,
border_color = NA,
color = cividis(256),
scale = "row"
)
Create design matrix and import quantifications:
samples <- data.frame(
condition = c("parent", "parent", "parent", "patient", "patient", "patient"),
row.names = c("parent_2-1", "parent_2-2", "parent_2-3", "patient_2-1", "patient_2-2", "patient_2-3")
)
tx2gene <- read_tsv("transcript2gene.tsv")
files <- file.path("Quantifications", rownames(samples), "quant.sf")
txi <- tximport(files, type = "salmon", tx2gene = tx2gene)
Create DESeq2 object and perform differential expression testing with apeglm shrinkage and using lfcThreshold to make the alternative hypothesis stricter:
dds <- DESeqDataSetFromTximport(txi, colData = samples, design = ~condition)
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## using counts and average transcript lengths from tximport
dds <- DESeq(dds)
## estimating size factors
## using 'avgTxLength' from assays(dds), correcting for library size
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
res <- lfcShrink(dds, coef = "condition_patient_vs_parent", type = "apeglm", lfcThreshold = 0.5)
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
## Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
## sequence count data: removing the noise and preserving large differences.
## Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## computing FSOS 'false sign or small' s-values (T=0.5)
formated_res <- as.data.frame(res) %>%
rownames_to_column("gene") %>%
arrange(desc(abs(log2FoldChange))) %>%
mutate(significative = if_else(svalue < 0.005, "yes", "no")) %>%
mutate(up = if_else(log2FoldChange < 0, "parent", "patient"))
paged_table(formated_res)
table(formated_res$significative)
##
## no yes
## 23684 4973
plotMA(res)
## thresholding s-values on alpha=0.005 to color points
vsd <- vst(dds, blind = TRUE)
plotPCA(vsd, intgroup = "condition")
select <- order(rowMeans(counts(dds, normalized = TRUE)), decreasing = TRUE)[1:5000]
df <- as.data.frame(colData(dds))
pheatmap(assay(vsd)[select, ],
cluster_rows = TRUE,
show_rownames = FALSE,
cluster_cols = TRUE,
annotation_col = df,
border_color = NA,
color = cividis(256),
scale = "row"
)
Create design matrix and import quantifications:
samples <- data.frame(
condition = c("parent", "parent", "parent", "patient", "patient"),
row.names = c("parent_3-1", "parent_3-2", "parent_3-3", "patient_3-1", "patient_3-2")
)
tx2gene <- read_tsv("transcript2gene.tsv")
files <- file.path("Quantifications", rownames(samples), "quant.sf")
txi <- tximport(files, type = "salmon", tx2gene = tx2gene)
The patient_3-3 sample is not being used for this analysis as it was determined to be an outlier based on previous analysis, as shown below.
Create DESeq2 object and perform differential expression testing with apeglm shrinkage and using lfcThreshold to make the alternative hypothesis stricter:
dds <- DESeqDataSetFromTximport(txi, colData = samples, design = ~condition)
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## using counts and average transcript lengths from tximport
dds <- DESeq(dds)
## estimating size factors
## using 'avgTxLength' from assays(dds), correcting for library size
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
res <- lfcShrink(dds, coef = "condition_patient_vs_parent", type = "apeglm", lfcThreshold = 0.5)
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
## Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
## sequence count data: removing the noise and preserving large differences.
## Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## computing FSOS 'false sign or small' s-values (T=0.5)
formated_res <- as.data.frame(res) %>%
rownames_to_column("gene") %>%
arrange(desc(abs(log2FoldChange))) %>%
mutate(significative = if_else(svalue < 0.005, "yes", "no")) %>%
mutate(up = if_else(log2FoldChange < 0, "parent", "patient"))
paged_table(formated_res)
table(formated_res$significative)
##
## no yes
## 24078 4221
plotMA(res)
## thresholding s-values on alpha=0.005 to color points
vsd <- vst(dds, blind = TRUE)
plotPCA(vsd, intgroup = "condition")
select <- order(rowMeans(counts(dds, normalized = TRUE)), decreasing = TRUE)[1:5000]
df <- as.data.frame(colData(dds))
pheatmap(assay(vsd)[select, ],
cluster_rows = TRUE,
show_rownames = FALSE,
cluster_cols = TRUE,
annotation_col = df,
border_color = NA,
color = cividis(256),
scale = "row"
)
Create design matrix and import quantifications:
samples <- data.frame(
condition = c("parent", "parent", "parent", "patient", "patient", "patient"),
row.names = c("parent_4-1", "parent_4-2", "parent_4-3", "patient_4-1", "patient_4-2", "patient_4-3")
)
tx2gene <- read_tsv("transcript2gene.tsv")
files <- file.path("Quantifications", rownames(samples), "quant.sf")
txi <- tximport(files, type = "salmon", tx2gene = tx2gene)
Create DESeq2 object and perform differential expression testing with apeglm shrinkage and using lfcThreshold to make the alternative hypothesis stricter:
dds <- DESeqDataSetFromTximport(txi, colData = samples, design = ~condition)
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## using counts and average transcript lengths from tximport
dds <- DESeq(dds)
## estimating size factors
## using 'avgTxLength' from assays(dds), correcting for library size
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
res <- lfcShrink(dds, coef = "condition_patient_vs_parent", type = "apeglm", lfcThreshold = 0.5)
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
## Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
## sequence count data: removing the noise and preserving large differences.
## Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## computing FSOS 'false sign or small' s-values (T=0.5)
formated_res <- as.data.frame(res) %>%
rownames_to_column("gene") %>%
arrange(desc(abs(log2FoldChange))) %>%
mutate(significative = if_else(svalue < 0.005, "yes", "no")) %>%
mutate(up = if_else(log2FoldChange < 0, "parent", "patient"))
paged_table(formated_res)
table(formated_res$significative)
##
## no yes
## 26638 2254
plotMA(res)
## thresholding s-values on alpha=0.005 to color points
vsd <- vst(dds, blind = TRUE)
plotPCA(vsd, intgroup = "condition")
select <- order(rowMeans(counts(dds, normalized = TRUE)), decreasing = TRUE)[1:5000]
df <- as.data.frame(colData(dds))
pheatmap(assay(vsd)[select, ],
cluster_rows = TRUE,
show_rownames = FALSE,
cluster_cols = TRUE,
annotation_col = df,
border_color = NA,
color = cividis(256),
scale = "row"
)
Create design matrix and import quantifications:
samples <- data.frame(
condition = c("parent", "parent", "parent", "patient", "patient", "patient"),
row.names = c("parent_1_n-1", "parent_1_n-2", "parent_1_n-3", "patient_1_n-1", "patient_1_n-2", "patient_1_n-3")
)
tx2gene <- read_tsv("transcript2gene.tsv")
files <- file.path("Quantifications", rownames(samples), "quant.sf")
txi <- tximport(files, type = "salmon", tx2gene = tx2gene)
Create DESeq2 object and perform differential expression testing with apeglm shrinkage and using lfcThreshold to make the alternative hypothesis stricter:
dds <- DESeqDataSetFromTximport(txi, colData = samples, design = ~condition)
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## using counts and average transcript lengths from tximport
dds <- DESeq(dds)
## estimating size factors
## using 'avgTxLength' from assays(dds), correcting for library size
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
res <- lfcShrink(dds, coef = "condition_patient_vs_parent", type = "apeglm", lfcThreshold = 0.5)
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
## Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
## sequence count data: removing the noise and preserving large differences.
## Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## computing FSOS 'false sign or small' s-values (T=0.5)
formated_res <- as.data.frame(res) %>%
rownames_to_column("gene") %>%
arrange(desc(abs(log2FoldChange))) %>%
mutate(significative = if_else(svalue < 0.005, "yes", "no")) %>%
mutate(up = if_else(log2FoldChange < 0, "parent", "patient"))
paged_table(formated_res)
table(formated_res$significative)
##
## no yes
## 20182 8386
plotMA(res)
## thresholding s-values on alpha=0.005 to color points
vsd <- vst(dds, blind = TRUE)
plotPCA(vsd, intgroup = "condition")
select <- order(rowMeans(counts(dds, normalized = TRUE)), decreasing = TRUE)[1:5000]
df <- as.data.frame(colData(dds))
pheatmap(assay(vsd)[select, ],
cluster_rows = TRUE,
show_rownames = FALSE,
cluster_cols = TRUE,
annotation_col = df,
border_color = NA,
color = cividis(256),
scale = "row"
)
Create design matrix and import quantifications:
samples <- data.frame(
condition = c("parent", "parent", "parent", "patient", "patient", "patient"),
row.names = c("parent_2_n-1", "parent_2_n-2", "parent_2_n-3", "patient_2_n-1", "patient_2_n-2", "patient_2_n-3")
)
tx2gene <- read_tsv("transcript2gene.tsv")
files <- file.path("Quantifications", rownames(samples), "quant.sf")
txi <- tximport(files, type = "salmon", tx2gene = tx2gene)
Create DESeq2 object and perform differential expression testing with apeglm shrinkage and using lfcThreshold to make the alternative hypothesis stricter:
dds <- DESeqDataSetFromTximport(txi, colData = samples, design = ~condition)
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## using counts and average transcript lengths from tximport
dds <- DESeq(dds)
## estimating size factors
## using 'avgTxLength' from assays(dds), correcting for library size
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
res <- lfcShrink(dds, coef = "condition_patient_vs_parent", type = "apeglm", lfcThreshold = 0.5)
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
## Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
## sequence count data: removing the noise and preserving large differences.
## Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## computing FSOS 'false sign or small' s-values (T=0.5)
formated_res <- as.data.frame(res) %>%
rownames_to_column("gene") %>%
arrange(desc(abs(log2FoldChange))) %>%
mutate(significative = if_else(svalue < 0.005, "yes", "no")) %>%
mutate(up = if_else(log2FoldChange < 0, "parent", "patient"))
paged_table(formated_res)
table(formated_res$significative)
##
## no yes
## 23146 5778
plotMA(res)
## thresholding s-values on alpha=0.005 to color points
vsd <- vst(dds, blind = TRUE)
plotPCA(vsd, intgroup = "condition")
select <- order(rowMeans(counts(dds, normalized = TRUE)), decreasing = TRUE)[1:5000]
df <- as.data.frame(colData(dds))
pheatmap(assay(vsd)[select, ],
cluster_rows = TRUE,
show_rownames = FALSE,
cluster_cols = TRUE,
annotation_col = df,
border_color = NA,
color = cividis(256),
scale = "row"
)
Create design matrix and import quantifications:
samples <- data.frame(
condition = c("parent", "parent", "parent", "patient", "patient", "patient"),
row.names = c("parent_4_n-1", "parent_4_n-2", "parent_4_n-3", "patient_4_n-1", "patient_4_n-2", "patient_4_n-3")
)
tx2gene <- read_tsv("transcript2gene.tsv")
files <- file.path("Quantifications", rownames(samples), "quant.sf")
txi <- tximport(files, type = "salmon", tx2gene = tx2gene)
Create DESeq2 object and perform differential expression testing with apeglm shrinkage and using lfcThreshold to make the alternative hypothesis stricter:
dds <- DESeqDataSetFromTximport(txi, colData = samples, design = ~condition)
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## using counts and average transcript lengths from tximport
dds <- DESeq(dds)
## estimating size factors
## using 'avgTxLength' from assays(dds), correcting for library size
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
res <- lfcShrink(dds, coef = "condition_patient_vs_parent", type = "apeglm", lfcThreshold = 0.5)
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
## Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
## sequence count data: removing the noise and preserving large differences.
## Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## computing FSOS 'false sign or small' s-values (T=0.5)
formated_res <- as.data.frame(res) %>%
rownames_to_column("gene") %>%
arrange(desc(abs(log2FoldChange))) %>%
mutate(significative = if_else(svalue < 0.005, "yes", "no")) %>%
mutate(up = if_else(log2FoldChange < 0, "parent", "patient"))
paged_table(formated_res)
table(formated_res$significative)
##
## no yes
## 25073 6500
plotMA(res)
## thresholding s-values on alpha=0.005 to color points
vsd <- vst(dds, blind = TRUE)
plotPCA(vsd, intgroup = "condition")
select <- order(rowMeans(counts(dds, normalized = TRUE)), decreasing = TRUE)[1:5000]
df <- as.data.frame(colData(dds))
pheatmap(assay(vsd)[select, ],
cluster_rows = TRUE,
show_rownames = FALSE,
cluster_cols = TRUE,
annotation_col = df,
border_color = NA,
color = cividis(256),
scale = "row"
)
patient_3-3 is an outlier sampleLoad data from all samples of the Parent 3 × Patient 3 comparison:
samples <- data.frame(
condition = c("parent", "parent", "parent", "patient", "patient", "patient"),
row.names = c("parent_3-1", "parent_3-2", "parent_3-3", "patient_3-1", "patient_3-2", "patient_3-3")
)
tx2gene <- read_tsv("transcript2gene.tsv")
files <- file.path("Quantifications", rownames(samples), "quant.sf")
txi <- tximport(files, type = "salmon", tx2gene = tx2gene)
dds <- DESeqDataSetFromTximport(txi, colData = samples, design = ~condition)
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
dds <- DESeq(dds)
Visualization of the expression data via PCA and clustering of Euclidean distances reveal that patient_3-3 is an outlier:
vsd <- vst(dds, blind = TRUE)
plotPCA(vsd, intgroup = "condition") +
geom_label(aes(label = colnames(vsd)), alpha = 0.5, nudge_x = 5, nudge_y = 2)
select <- order(rowMeans(counts(dds, normalized = TRUE)), decreasing = TRUE)[1:5000]
df <- as.data.frame(colData(dds))
pheatmap(assay(vsd)[select, ],
cluster_rows = TRUE,
show_rownames = FALSE,
cluster_cols = TRUE,
annotation_col = df,
border_color = NA,
color = cividis(256),
scale = "row"
)